This data is STDIF structure - a class for unstructured spatio-temporal data, with n spatial locations and times, where n observations are available, from the spacetime package.
The 0s actually reflect that there were no observations at that time - in other words, they are not false absences, and should not be removed.
## class : SpatialPoints
## features : 1
## extent : 1189877, 1189877, 459784, 459784 (xmin, xmax, ymin, ymax)
## crs : +proj=lcc +lat_0=44 +lon_0=-70 +lat_1=50 +lat_2=46 +x_0=800000 +y_0=0 +datum=NAD83 +units=m +no_defs
## timeIndex
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
## [1] "1971-09-07 08:25:00 ADT" "1971-09-07 11:15:00 ADT"
## [3] "1971-09-07 12:45:00 ADT" "1971-09-07 15:05:00 ADT"
## [5] "1971-09-08 08:25:00 ADT" "1971-09-08 10:25:00 ADT"
plot(number@endTime, number@data$`WHITE HAKE`, type = "l")
plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300)
plot(gulf, add = TRUE)
# function to add coordinates to dataset in preparation for conversion to sf
add_coords <- function(stidf_object){
# assign datasets to objects to simplify the code a bit
df <- stidf_object@data
coords <- stidf_object@sp@coords
# add coordinate columns to dataset
df <- mutate(df,
longitude = coords[,1],
latitude = coords[,2],
time = lubridate::date(stidf_object@endTime),
year = lubridate::year(stidf_object@endTime)
)
return(df)
}
# convert datasets to sf objects
explan_sf <- add_coords(explan) %>%
st_as_sf(x = ., coords = c("longitude", "latitude"))
st_crs(explan_sf) <- explan@sp@proj4string # set coordinate ref system
gulf_sf <- st_as_sf(gulf)
st_crs(gulf_sf) <- gulf@proj4string # set coordinate ref system
# maps
# ggplot map of explanatory variable
p1 <- ggplot() +
geom_sf(data = gulf_sf, fill = "white") +
geom_sf(data = filter(explan_sf,
# pick a sequence of years to check out
year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
aes(col = bottom.temperature)) +
facet_wrap(~year) +
scale_color_viridis_c(option = "viridis") +
labs(color = "Bottom \ntemperature") +
theme_void()
p2 <- ggplot() +
geom_sf(data = gulf_sf, fill = "white") +
geom_sf(data = filter(explan_sf,
# pick a sequence of years to check out
year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
aes(col = bottom.salinity)) +
facet_wrap(~year) +
scale_color_viridis_c(option = "viridis") +
labs(color = "Bottom \nsalinity") +
theme_void()
p3 <- ggplot() +
geom_sf(data = gulf_sf) +
geom_sf(data = filter(explan_sf,
# pick a sequence of years to check out
year %in% c(1971, 1981, 1992, 2001, 2011, 2019)),
aes(col = depth)) +
facet_wrap(~year) +
scale_color_viridis_c(option = "viridis") +
labs(color = "Depth") +
theme_void()
# patchwork together!
p1 / p2 / p3
This mesh size reflects the range of coordinates in space. Changing to a smaller value, like 35000, gives a more appropriate mesh.
maxEdge <- 25000
meshSpace <- inla.mesh.2d(boundary = gulf,
max.edge = maxEdge*c(0.5,2),
offset = c(10000,20000),
cutoff = maxEdge/2)
# plot points under the mesh, to see if it makes sense
par(mar = c(1,1,1,1))
plot(number@sp, pch = 19, cex = 0.2, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)
# plot abundances under the mesh too
plot(number@sp, pch = 19, cex = number@data$`WHITE HAKE`/300, col = "red")
plot(gulf, add = TRUE)
plot(meshSpace, main = "", asp = 1, add = TRUE)
# Number of edges in the mesh
meshSpace$n
## [1] 470
### Time
time <- as.numeric(number@endTime)/60 # Number of minutes since Jan 1, 1970
timeBasis <- time - 884844 # Time starting at 1
# Division points
timeGr <- 11
timeSeq <- seq(min(timeBasis),
max(timeBasis),
length = timeGr)
meshTime <- inla.mesh.1d(timeSeq)
# ^ This divides time into 11 bins of minutes since 1970.
#timeGr <- 2
# try with two extreme time points, just to simplify the model run
#meshTime <- inla.mesh.1d(c(min(timeBasis), max(timeBasis)))
How many estimations will be carried out?
meshSpace$n * meshTime$n
## [1] 5170
## [1] 34.64238
# Temporal autocorrelation prior
hSpec <- list(theta=list(prior='pccor1', param=c(0.2, 0.9)))
# assuming not a lot of temporal autocorrelation (0.2) and we are fairly sure of that (0.9)
# Precision likelihood
## (negative binomial or any distribution that has more than one parameter)
# precPrior <- list(prior='pc.prec', param=c(1, 0.01))
# guessing it's 1 with a confidence of .01, which is super low.
# note ^ is not useful if using a Poisson.
Field <- inla.spde.make.index("field",
n.spde = SPDE$n.spde,
n.group = meshTime$n)
# For estimation
A <- inla.spde.make.A(meshSpace,
loc = coordinates(number@sp),
group = timeBasis,
group.mesh = meshTime)
Alist <- as.list(rep(1,4)) # list with single values
Alist[[1]] <- A
Here, we’re using two variables about the bottom conditions, because White hake is a groundfish.
effect <- list(Field,
bTemp = explan@data$bottom.temperature,
bSal = explan@data$bottom.salinity,
bDep = explan@data$depth)
Stack <- inla.stack(data=list(whitehake = number@data$`WHITE HAKE`),
A = Alist,
effects = effect,
tag="basis")
# build candidate models --- only run with two time points. otherwise, eternal
form_0 <- whitehake ~ 0 +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_1 <- whitehake ~ 0 + bTemp +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_2 <- whitehake ~ 0 + bSal +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_3 <- whitehake ~ 0 + bDep +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_4 <- whitehake ~ 0 + bTemp + bSal +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_5 <- whitehake ~ 0 + bTemp + bDep +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_6 <- whitehake ~ 0 + bSal + bDep +
f(field, model=SPDE, group = field.group,
control.group=list(model='ar1', hyper=hSpec))
form_7 <- whitehake ~ 0 + bTemp + bSal + bDep +
f(field, model=SPDE, group = field.group, # how they're grouped
control.group=list(model='ar1', hyper=hSpec)) # build with model parameters
forms_ls <- list(form_0, form_1, form_2, form_3, form_4, form_5, form_6, form_7)
# evaluate each model
model_ls <- vector("list", length = length(forms_ls))
for(i in 1:length(model_ls)){
model_ls[[i]] <- inla(forms_ls[[i]],
data = inla.stack.data(Stack),
family="gaussian",
control.family = list(link="identity"),
control.predictor = list(A = inla.stack.A(Stack),
compute = TRUE,
link = 1),
control.compute = list(waic = TRUE, config = TRUE))
}
# extract waic to compare
waic_df <- data.frame(model = 1:7, waic = NA)
for(i in 1:nrow(waic_df)){
waic_df[i,2] <- model_ls[[i]]$waic$waic
}
# find lowest waic (it's model #4)
waic_df[which(waic_df$waic == min(waic_df$waic)),]
#saveRDS(model_gaussian, "outputs/whitehake_model.RDS")
form <- whitehake ~ 0 + bTemp + bSal +
f(field, # temporal autocorr
model=SPDE, # spatial autocorr structure
group = field.group, # how they're grouped
control.group=list(model='ar1', hyper=hSpec) # build with model parameters
)
model_gaussian <- inla(form,
data = inla.stack.data(Stack),
family="gaussian",
control.family = list(link="identity"),
control.predictor = list(A = inla.stack.A(Stack),
compute = TRUE,
link = 1),
control.compute = list(waic = TRUE,
config = TRUE))
summary(model_gaussian)
##
## Call:
## c("inla(formula = form, family = \"gaussian\", data =
## inla.stack.data(Stack), ", " control.compute = list(waic = TRUE, config
## = TRUE), control.predictor = list(A = inla.stack.A(Stack), ", " compute
## = TRUE, link = 1), control.family = list(link = \"identity\"))" )
## Time used:
## Pre = 5.64, Running = 312, Post = 3.38, Total = 321
## Fixed effects:
## mean sd 0.025quant 0.5quant 0.975quant mode kld
## bTemp 1.215 0.158 0.905 1.215 1.524 1.215 0
## bSal -0.054 0.027 -0.107 -0.054 -0.001 -0.054 0
##
## Random effects:
## Name Model
## field SPDE2 model
##
## Model hyperparameters:
## mean sd 0.025quant 0.5quant
## Precision for the Gaussian observations 1.00e-03 0.000 1.00e-03 1.00e-03
## Range for field 1.13e+04 191.155 1.09e+04 1.13e+04
## Stdev for field 3.25e+01 0.480 3.16e+01 3.25e+01
## GroupRho for field 7.47e-01 0.006 7.34e-01 7.48e-01
## 0.975quant mode
## Precision for the Gaussian observations 1.00e-03 1.00e-03
## Range for field 1.17e+04 1.13e+04
## Stdev for field 3.35e+01 3.24e+01
## GroupRho for field 7.57e-01 7.50e-01
##
## Expected number of effective parameters(stdev): 692.70(13.71)
## Number of equivalent replicates : 9.51
##
## Watanabe-Akaike information criterion (WAIC) ...: 63501.23
## Effective number of parameters .................: 713.59
##
## Marginal log-Likelihood: -32011.66
## Posterior marginals for the linear predictor and
## the fitted values are computed
Efxplot(list(model_gaussian)) + theme_classic()
## Loading required package: MCMCglmm
## Loading required package: coda
## Loading required package: ape
##
## Attaching package: 'ape'
## The following objects are masked from 'package:raster':
##
## rotate, zoom
# from Coding Club tutorial: "NB: There are no P values in INLA. Importance or significance of variables can be deduced by examining the overlap of their 2.5% and 97.5% posterior estimates with zero."
ExpY <- model_gaussian$summary.fitted.values[1:nrow(number@data),"mean"]
#For the variance we also need the posterior mean value of θ, which is obtained via
phi1 <- model_gaussian$summary.hyper[1, "mean"]
#The variance is then given by
VarY <- ExpY * (1 - ExpY) / (1 + phi1)
# And once we have the mean and the variance we can calculate Pearson residuals.
E1 <- (number@data$`WHITE HAKE` - ExpY) / sqrt(VarY)
## Warning in sqrt(VarY): NaNs produced
# Apply model validation
# Figure 23.11
par(mfrow = c(1,1), mar = c(5,5,2,2), cex.lab = 1.5)
plot(x = model_gaussian$summary.fitted.values$mean[1:nrow(number@data)],
y = E1,
xlab = "Fitted values",
ylab = "Pearson residuals",
xlim = c(0, 1))
abline(h = 0, lty = 2)
# note: there's a value at 130...
This is the output you would get from a normal linear model. - mode: mode of the parameters distribution - kld: ????
Field group is the random effect we set earlier.
Distribution for the parameter picked with mean, sd, and credible interval
# Dimension of the raster
stepsize <- 1000 # choose cell size: 1000m x 1000m
rangeX <- range(meshSpace$loc[,1])
rangeY <- range(meshSpace$loc[,2])
nxy <- round(c(diff(rangeX),
diff(rangeY)) / stepsize)
# Define basis of the map
mapBasis <- inla.mesh.projector(meshSpace,
xlim = rangeX,
ylim = rangeY,
crs = crs(gulf))
# Calculate prediction
mapMean <- vector("list", length = timeGr)
map.025 <- vector("list", length = timeGr)
map.975 <- vector("list", length = timeGr)
# for every time point
for(i in 1:timeGr){
# Model prediction
fitMean <- inla.mesh.project(mapBasis,
model_gaussian$summary.random$field$mean[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
fit.025 <- inla.mesh.project(mapBasis,
model_gaussian$summary.random$field$`0.025quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
fit.975 <- inla.mesh.project(mapBasis,
model_gaussian$summary.random$field$`0.975quant`[1:SPDE$n.spde + (i - 1) * SPDE$n.spde])
# Build maps with confidence intervals
mapMean[[i]] <- raster(t(fitMean[,ncol(fitMean):1]),
xmn = min(mapBasis$x), xmx = max(mapBasis$x),
ymn = min(mapBasis$y), ymx = max(mapBasis$y),
crs = crs(gulf))
map.025[[i]] <- raster(t(fit.025[,ncol(fit.025):1]),
xmn = min(mapBasis$x), xmx = max(mapBasis$x),
ymn = min(mapBasis$y), ymx = max(mapBasis$y),
crs = crs(gulf))
map.975[[i]] <- raster(t(fit.975[,ncol(fit.975):1]),
xmn = min(mapBasis$x), xmx = max(mapBasis$x),
ymn = min(mapBasis$y), ymx = max(mapBasis$y),
crs = crs(gulf))
}
# Convert to a rasterStack (aligns the elements of a list)
rasterMean <- stack(mapMean)
raster.025 <- stack(map.025)
raster.975 <- stack(map.975)
# to check this visually:
# image(mapMean[[1]])
# plot(meshSpace, add = TRUE)
# reorganise
for(i in 1:5){
values(rasterMean)[,i] <- as.vector(t(mapMean[[i]]))
values(raster.025)[,i] <- as.vector(t(map.025[[i]]))
values(raster.975)[,i] <- as.vector(t(map.975[[i]]))
}
# Mask the region to keep only the region of interest
rasterMeanMask <- mask(rasterMean, gulf)
raster.025Mask <- mask(raster.025, gulf)
raster.975Mask <- mask(raster.975, gulf)
# image(rasterMeanMask)
# plot(gulf, add = TRUE)
# Map the results
par(mfrow = c(2,3), mar = c(1,1,3,1))
for(i in 1:timeGr){
zlimRange <- range(values(raster.025Mask[[i]]),
values(rasterMeanMask[[i]]),
values(raster.975Mask[[i]]), na.rm = TRUE)
}
# convert to data frames for ggplot
raster.025Mask_df <- as.data.frame(raster.025Mask, xy = TRUE, na.rm = TRUE) %>%
pivot_longer(cols = 3:ncol(.)) %>%
mutate(quant = "Lower (0.025)")
rasterMeanMask_df <- as.data.frame(rasterMeanMask, xy = TRUE, na.rm = TRUE) %>%
pivot_longer(cols = 3:ncol(.)) %>%
mutate(quant = "Mean")
raster.975Mask_df <- as.data.frame(raster.975Mask, xy = TRUE, na.rm = TRUE) %>%
pivot_longer(cols = 3:ncol(.)) %>%
mutate(quant = "Higher (0.975)")
# bind together
raster_df <- rbind(raster.025Mask_df, rasterMeanMask_df, raster.975Mask_df)
raster_df$quant <- factor(raster_df$quant, levels = c("Lower (0.025)", "Mean", "Higher (0.975)"))
# make this more automatized later (label with years)
raster_df$name <- factor(raster_df$name, levels = c("layer.1",
"layer.2",
"layer.3",
"layer.4",
"layer.5",
"layer.6",
"layer.7",
"layer.8",
"layer.9",
"layer.10",
"layer.11"))
p <- ggplot() +
geom_raster(data = filter(raster_df, quant == "Mean"),
aes(x = x, y = y, fill = value)) +
labs(fill = "Abundance") +
scale_fill_distiller(palette = "RdBu",
limits = c(-max(abs(raster_df$value)),
max(abs(raster_df$value)))) +
#facet_wrap(~quant) +
theme_void() +
gganimate::transition_states(name, transition_length=.6, ) +
labs(title = 'Time group: {closest_state}')
p
# save!
gganimate::anim_save("predictions.gif", animation = p, path = "figures/")
Each time step:
p2 <- ggplot() +
geom_raster(data = raster_df,
aes(x = x, y = y, fill = value)) +
labs(fill = "Abundance") +
scale_fill_distiller(palette = "RdBu",
limits = c(-max(abs(raster_df$value)),
max(abs(raster_df$value)))) +
facet_grid(name~quant) +
theme_void()
p2